Import the data (from the decontamination script) and create a supragingival dataset

Note that I also ran the EMS statistics in two separate turnover scripts. The output from these will be loaded here.

## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, cbind, colMeans,
##     colnames, colSums, do.call, duplicated, eval, evalq, Filter,
##     Find, get, grep, grepl, intersect, is.unsorted, lapply,
##     lengths, Map, mapply, match, mget, order, paste, pmax,
##     pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce,
##     rowMeans, rownames, rowSums, sapply, setdiff, sort, table,
##     tapply, union, unique, unsplit, which, which.max, which.min
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:phyloseq':
## 
##     distance
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:phyloseq':
## 
##     sampleNames
## Loading required package: DelayedArray
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## The following objects are masked from 'package:genefilter':
## 
##     rowSds, rowVars
## 
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
## 
##     colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following object is masked from 'package:base':
## 
##     apply

Discovery datatset

discovery = readRDS("~/Dropbox/Periodontology2000/katie_biogeo_tree.RDS")
discovery
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 1679 taxa and 84 samples ]
## sample_data() Sample Data:       [ 84 samples by 61 sample variables ]
## tax_table()   Taxonomy Table:    [ 1679 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 1679 tips and 1677 internal nodes ]
#how many subjects
levels(sample_data(discovery)$Subject)
## [1] "72"  "73"  "XC3"
#what's the median sequencing depth
summary(sample_sums(discovery))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       2   22145   27413   28717   35102   75977

Description of data

decontam.phy=readRDS("~/Desktop/Periodontol2000/periodontol_moresubjects.RDS")
decontam.phy = subset_samples(decontam.phy, Subject !="Control")
#how many samples are in the validation dataset?
nsamples(decontam.phy)
## [1] 663
#how many subjects are in the validation dataset?
length(levels(sample_data(decontam.phy)$Subject))
## [1] 8
#how many healthy aim 1 subjects are in the validation dataset?
aim1 = subset_samples(decontam.phy, Aim=="SA1" | Aim=="Pilot")
length(levels(sample_data(aim1)$Subject))
## [1] 5
#how many healthy aim 3 subjects are in the validation dataset?
aim3 = subset_samples(decontam.phy, Aim=="SA3")
length(levels(sample_data(aim3)$Subject))
## [1] 3
#how many subgingival samples
sub = subset_samples(decontam.phy, Habitat_Class=="Sub")
sub = prune_taxa(taxa_sums(sub) > 0, sub)
sub = prune_samples(sample_sums(sub) > 0, sub)

#what subgingival samples were collected
#what sample sites were sampled
sub = subset_samples(decontam.phy, Habitat_Class=="Sub")
levels(sample_data(sub)$Tooth_Number)
##  [1] "11" "14" "19" "22" "24" "25" "27" "3"  "30" "6"  "8"  "9"
#how many supragingival samples
supra = subset_samples(decontam.phy, Habitat_Class=="Supra")


#how many ASVs are there
decontam.phy
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 625 taxa and 663 samples ]
## sample_data() Sample Data:       [ 663 samples by 30 sample variables ]
## tax_table()   Taxonomy Table:    [ 625 taxa by 7 taxonomic ranks ]
#write a csv and contruct a highest taxonomic rank degination in the taxa table
df = data.frame(tax_table(decontam.phy))
#write.csv(df, file="periodontology2000_tax.csv")
newtax= read.csv("~/Desktop/Projects/Stanford/RelmanLab/Hyposalivation/Periodontol2000/periodontology2000_tax.csv", row.names = 1)
newtax = as.matrix(newtax)
newtax= tax_table(newtax)
tax_table(decontam.phy)= newtax


#replae taxa names with the highest label
df=data.frame(tax_table(decontam.phy))
taxa_names(decontam.phy)=df$Highest.Rank

#what is the median sequencing depth of the validation dataset?
summary(sample_sums(decontam.phy))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       1   27408   53636   52310   76684  133778
df = data.frame(sample_sums(decontam.phy), sample_data(decontam.phy))
colnames(df)[1] = "depth"

p = ggplot(df, aes(depth, color=Habitat_Class)) + geom_density()
p

mysum = doBy::summaryBy(depth~Habitat_Class, df, FUN=c(median, mean, length, sum, min, max))
mysum
##   Habitat_Class depth.median depth.mean depth.length depth.sum depth.min
## 1         Supra      70382.5   62679.78          440  27579102         1
## 2           Sub      30174.0   31848.26          223   7102161      4345
##   depth.max
## 1    133778
## 2     80403

Description of data: Phyla level composition survey

  • These results are described in the main text
  • Supplementary Table 1: Top 5 Phyla account for most of the reads
#how many unique phyla are found?
length(get_taxa_unique(decontam.phy, taxonomic.rank="Phylum"))
## [1] 15
#What is the relative abundance of each Phylum?
library(doBy)
phy <- tax_glom(decontam.phy, taxrank="Phylum")
      tax.count <- data.frame(taxa_sums(phy),tax_table(phy)[,2])
      rownames(tax.count)= NULL
      colnames(tax.count)[1] <- c("Abundance")
      tax.count$Percent <- round(tax.count$Abundance/sum(tax.count$Abundance)*100, 4)
      library(plyr)
## 
## Attaching package: 'plyr'
## The following object is masked from 'package:matrixStats':
## 
##     count
## The following object is masked from 'package:IRanges':
## 
##     desc
## The following object is masked from 'package:S4Vectors':
## 
##     rename
      Phylum_df <- arrange(tax.count, desc(Percent))
      colnames(Phylum_df) = c("Total.Abundance", "Phylum", "Total.Percent")
      
      
      #how much do the top 5 phyla contribute to total abundance?
      top5 <- Phylum_df[1:5, ]
      round(sum(top5$Percent),2)
## [1] 0
      #What are the top 5 Phyla?
      top5
##   Total.Abundance         Phylum Total.Percent
## 1        14981045     Firmicutes       43.2156
## 2         7412528 Proteobacteria       21.3828
## 3         6535861 Actinobacteria       18.8539
## 4         3810015  Bacteroidetes       10.9907
## 5         1695931   Fusobacteria        4.8922
##################what about the subgingival samples
subgingival.phy = subset_samples(decontam.phy, Habitat_Class=="Sub")
      phy <- tax_glom(subgingival.phy, taxrank="Phylum")
      tax.count <- data.frame(taxa_sums(phy),tax_table(phy)[,2])
      rownames(tax.count)= NULL
      colnames(tax.count)[1] <- c("Abundance")
      tax.count$Percent <- round(tax.count$Abundance/sum(tax.count$Abundance)*100, 4)
      Phylum_sub <- arrange(tax.count, desc(Percent))
      Phylum_sub <- subset(Phylum_sub, Abundance > 0)
      colnames(Phylum_sub) = c("Sub.Abundance", "Phylum" ,"Sub.Percent")
      
##################what about the supragingival samples
supragingival.phy = subset_samples(decontam.phy, Habitat_Class=="Supra")
      phy <- tax_glom(supragingival.phy, taxrank="Phylum")
      tax.count <- data.frame(taxa_sums(phy),tax_table(phy)[,2])
      rownames(tax.count)= NULL
      colnames(tax.count)[1] <- c("Abundance")
      tax.count$Percent <- round(tax.count$Abundance/sum(tax.count$Abundance)*100, 4)
      Phylum_supra <- arrange(tax.count, desc(Percent))
      Phylum_suora <- subset(Phylum_supra, Abundance > 0)
      colnames(Phylum_supra) = c("Supra.Abundance", "Phylum" ,"Supra.Percent")


#plot the phylum level differenes
df = join(Phylum_sub, Phylum_supra,  by="Phylum")
df= join(df, Phylum_df, by="Phylum")
dfm = melt(df, id.vars = c("Sub.Abundance", "Supra.Abundance", "Phylum", "Total.Abundance"))
fig1a = ggplot(dfm, aes(variable, value, fill=Phylum)) + geom_bar(position="stack", stat="identity") +
        scale_x_discrete(labels=c("Sub.Percent" = "Subgingival", "Supra.Percent" = "Supragingival", "Total.Percent" = "Totals")) +
        xlab("Habitat Class") + ylab("Percent Relative Abundance")

#test differences using proportion test
    testme = as.matrix(cbind(df[,3], df[,5]))
    prop.test(testme)
## Warning in prop.test(testme): Chi-squared approximation may be incorrect
## 
##  8-sample test for equality of proportions without continuity
##  correction
## 
## data:  testme
## X-squared = 50.353, df = 7, p-value = 1.232e-08
## alternative hypothesis: two.sided
## sample estimates:
##    prop 1    prop 2    prop 3    prop 4    prop 5    prop 6    prop 7 
## 0.8234193 0.5609600 0.2510159 0.8861476 0.3021838 0.9844929 0.9387453 
##    prop 8 
## 0.1086957

Description of data: genus-level summary of data

  • This is described in the main text
  • Supplementary Table 2: Top 10 Genera account for most of the reads
#What is the number of Genera found?
length(get_taxa_unique(decontam.phy, taxonomic.rank="Genus"))
## [1] 146
#what are the abundance levels of each genus?
supra.genus <- tax_glom(decontam.phy, taxrank="Genus")
tax.count <- data.frame(tax_table(supra.genus)[,2:7], taxa_sums(supra.genus))
rownames(tax.count) = NULL
colnames(tax.count) <- c("Phylum","Class","Order","Family","Genus", "Species",  "Abundance")
tax.count$Percent <- round(tax.count$Abundance/sum(tax.count$Abundance)*100, 4)
library(plyr)
Genus_df <- arrange(tax.count, desc(Percent))
Genus_df <- Genus_df[order(Genus_df$Percent, decreasing=TRUE), ] 

#how much do the top 10 genera contribute to total abundance?
top10 <- Genus_df[1:10, ]
round(sum(top10$Percent),3)
## [1] 85.584
#what about the top 5
top5 <- Genus_df[1:5, ]
round(sum(top5$Percent),3)
## [1] 68.078
###How diverse are the top 10 genera? i.e., how many species are there per genus?
top10 <- as.vector(Genus_df$Genus[1:10])
Diversity.list <- vector("list", 10)
names(Diversity.list) <- top10

for(i in 1:length(top10)){
       physub = subset_taxa(decontam.phy, Genus==top10[i])
       physub = prune_taxa(taxa_sums(physub) > 0, physub)
       Diversity.list[[i]] <- physub
}

#compute the number of taxa in each element of the list
Ntaxa <- data.frame(unlist(lapply(Diversity.list, ntaxa)))

colnames(Ntaxa) <- "N.Species"
#Make a table with percent abundance and number of taxa
genus.tab <- data.frame(Genus_df[1:10,], Ntaxa)
library(knitr)
kable(genus.tab, format="markdown")
Phylum Class Order Family Genus Species Abundance Percent N.Species
Firmicutes Bacilli Lactobacillales Streptococcaceae Streptococcus NA 9749877 33.0018 10
Actinobacteria Actinobacteria Actinomycetales Micrococcaceae Rothia NA 3747361 12.6842 5
Firmicutes Negativicutes Selenomonadales Veillonellaceae Veillonella NA 2774858 9.3925 9
Proteobacteria Betaproteobacteria Neisseriales Neisseriaceae Neisseria NA 1970888 6.6711 11
Bacteroidetes Bacteroidia Bacteroidales Prevotellaceae Prevotella NA 1869542 6.3281 50
Actinobacteria Actinobacteria Actinomycetales Corynebacteriaceae Corynebacterium NA 1372420 4.6454 4
Actinobacteria Actinobacteria Actinomycetales Actinomycetaceae Actinomyces NA 1179575 3.9927 15
Fusobacteria Fusobacteriia Fusobacteriales Fusobacteriaceae Fusobacterium NA 1092125 3.6967 22
Firmicutes Bacilli Lactobacillales Aerococcaceae Abiotrophia NA 800269 2.7088 1
Firmicutes Bacilli Bacillales Bacillales_Incertae_Sedis_XI Gemella NA 727480 2.4624 2
write.csv(genus.tab, file="TableS2.csv")

how does alpha diversity vary?

mi= subset_samples(decontam.phy, Tooth_Class %in% c("Incisor_Central", "Molar"))
mi = prune_samples(sample_sums(mi) > 0, mi)

subgingival.phy = subset_samples(mi, Habitat_Class=="Sub")
subgingival.phy = prune_taxa(taxa_sums(subgingival.phy) > 0, subgingival.phy)
subgingival.phy = prune_samples(sample_sums(subgingival.phy) > 0, subgingival.phy)


supragingival.phy = subset_samples(mi, Habitat_Class=="Supra")
supragingival.phy = prune_taxa(taxa_sums(supragingival.phy) > 0, supragingival.phy)
supragingival.phy = prune_samples(sample_sums(supragingival.phy) > 0, supragingival.phy)


meths = c("Shannon", "Chao1", "Simpson")
sites = levels(as.factor(sample_data(subgingival.phy)$Habitat))
sub.holder <-vector('list',length(sites)) 
supra.holder <- vector('list', length(sites))
prare.holder <- vector('list', length(sites))
brare.holder  <- vector('list', length(sites))

for(i in length(sites))
  {
  # Estimate diversity and make data frame
  erDF <- estimate_richness(subgingival.phy, split = TRUE, measures = meths)
  df <- data.frame(erDF, sample_data(subgingival.phy))
  sub.holder[[i]] <- df
  
}
## Warning in estimate_richness(subgingival.phy, split = TRUE, measures = meths): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
for(i in length(sites))
  {
  # Estimate diversity and make data frame
  erDF <- estimate_richness(supragingival.phy, split = TRUE, measures = meths)
  df <- data.frame(erDF, sample_data(supragingival.phy))
  supra.holder[[i]] <- df
  
}

#rarefy the supragingival samples to 
set.seed(191)
supra.rare = rarefy_even_depth(supragingival.phy, sample.size=20000, replace=FALSE)
## You set `rngseed` to FALSE. Make sure you've set & recorded
##  the random seed of your session for reproducibility.
## See `?set.seed`
## ...
## 19 samples removedbecause they contained fewer reads than `sample.size`.
## Up to first five removed samples are:
## 1101C01109R001104C01109R001104C01224R001102C01109R001102C01219R00    
## ...
## 15OTUs were removed because they are no longer 
## present in any sample after random subsampling
## ...
for(i in length(sites))
  {
  # Estimate diversity and make data frame
  erDF <- estimate_richness(supra.rare, split = TRUE, measures = meths)
  df <- data.frame(erDF, sample_data(supra.rare))
  prare.holder[[i]] <- df
  
}

#rarefy the supragingival samples to 
set.seed(191)
sub.rare = rarefy_even_depth(subgingival.phy, sample.size=20000, replace=FALSE)
## You set `rngseed` to FALSE. Make sure you've set & recorded
##  the random seed of your session for reproducibility.
## See `?set.seed`
## 
## ...
## 29 samples removedbecause they contained fewer reads than `sample.size`.
## Up to first five removed samples are:
## 1102C01314R001102C01319R001102C01330R001106C01303R081106C01309R08    
## ...
## 1OTUs were removed because they are no longer 
## present in any sample after random subsampling
## ...
brare.holder <- vector('list', length(sites))

for(i in length(sites))
  {
  # Estimate diversity and make data frame
  erDF <- estimate_richness(sub.rare, split = TRUE, measures = meths)
  df <- data.frame(erDF, sample_data(sub.rare))
  brare.holder[[i]] <- df
  
}
df1 = do.call("rbind", sub.holder)
df2 = do.call("rbind", supra.holder)
df3 = do.call("rbind", prare.holder)
df3$Habitat_Class = "Rarefied_Supra"
df4 = do.call("rbind", brare.holder)
df4$Habitat_Class = "Rarefied_Sub"


df = data.frame(rbind(df1, df2, df3, df4))
dfm1 = melt(df, id.vars=colnames(sample_data(supragingival.phy)))
dfm1= subset(dfm1, variable !="se.chao1")
p1 = ggplot(dfm1, aes(Habitat_Class, value)) + facet_wrap(~variable, scales="free_y", ncol=4) + geom_boxplot()
p1

#test to see whether alpha diversity varies based on tooth class
df = subset(df, Habitat_Class %in% c("Rarefied_Supra", "Rarefied_Sub" ))
wilcox.test(Chao1~Habitat_Class, data=df, exact=TRUE, conf.int=TRUE)
## Warning in wilcox.test.default(x = c(67, 56.3333333333333, 40, 56, 56,
## 47.25, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(67, 56.3333333333333, 40, 56, 56,
## 47.25, : cannot compute exact confidence intervals with ties
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Chao1 by Habitat_Class
## W = 14116, p-value = 3.642e-07
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##   6.75005 14.33328
## sample estimates:
## difference in location 
##               10.49998
wilcox.test(Shannon~Habitat_Class, data=df, exact=TRUE, conf.int=TRUE)
## 
##  Wilcoxon rank sum test
## 
## data:  Shannon by Habitat_Class
## W = 6761, p-value = 1.662e-07
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  -0.4452602 -0.2030296
## sample estimates:
## difference in location 
##             -0.3244488
wilcox.test(Simpson~Habitat_Class, data=df, exact=TRUE, conf.int=TRUE)
## 
##  Wilcoxon rank sum test
## 
## data:  Simpson by Habitat_Class
## W = 6815, p-value = 2.536e-07
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  -0.05794179 -0.02556212
## sample estimates:
## difference in location 
##            -0.04179579

Figure 1, Difference between microbial habitat

library(RColorBrewer)
### Create an index tooth biogeography subset
teeth = c("3", "8", "9", "14", "19", "24", "25", "30")
index_subsites <- subset_samples(decontam.phy, Tooth_Number %in% teeth)
index_subsites
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 625 taxa and 279 samples ]
## sample_data() Sample Data:       [ 279 samples by 30 sample variables ]
## tax_table()   Taxonomy Table:    [ 625 taxa by 10 taxonomic ranks ]
#how many samples are in average 2 subjects? Looks like about 25% of samples
summary(sample_data(index_subsites)$Subject)
## 1-101 1-102 1-104 1-105 1-106 3-301 3-302 3-303 
##    29    33    32    32    57    32    32    32
#filter the taxa to include those present in 25% of samples
filtergroup = filterfun(kOverA(k=70, A=1)) #k = number of samples; A = abundance
        index25 = filter_taxa(index_subsites, filtergroup, prune=TRUE) 
        index25 = prune_taxa(taxa_sums(index25) > 0, index25) 
        index25 = prune_samples(sample_sums(index25) > 0, index25) 
        index25
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 71 taxa and 279 samples ]
## sample_data() Sample Data:       [ 279 samples by 30 sample variables ]
## tax_table()   Taxonomy Table:    [ 71 taxa by 10 taxonomic ranks ]
#convert all the data to relative abundance        
indexRA = transform_sample_counts(index25, function(x) x / sum(x))

#Do PCOA
index.pcoa <- ordinate(indexRA, method="PCoA", distance="bray")
evals <- index.pcoa$values$Eigenvalues

### Use DESEq2 to identify taxa that are differentially abundant between subgingival and supragingival sites
diagdds = phyloseq_to_deseq2(index25, ~ Habitat_Class)
## converting counts to integer mode
# calculate geometric means prior to estimate size factors
gm_mean = function(x, na.rm=TRUE){
  exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x))
}
geoMeans = apply(counts(diagdds), 1, gm_mean)
diagdds = estimateSizeFactors(diagdds, geoMeans = geoMeans)
diagdds = DESeq(diagdds, fitType="local")
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 29 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
res = results(diagdds, cooksCutoff = FALSE)
alpha = 0.05
sigtab = res[which(res$padj < alpha), ]
sigtab = cbind(as(sigtab, "data.frame"), as(tax_table(index25)[rownames(sigtab), ], "matrix"))



posigtab = sigtab[sigtab[, "log2FoldChange"] > 5, ]
posigtab = posigtab[, c("baseMean", "log2FoldChange", "lfcSE", "padj", "Phylum", "Class", "Family", "Genus")]

library("ggplot2")
sigtabgen = subset(sigtab, !is.na(Genus))
# Phylum order
x = tapply(sigtabgen$log2FoldChange, sigtabgen$Phylum, function(x) max(x))
x = sort(x, TRUE)
sigtabgen$Phylum = factor(as.character(sigtabgen$Phylum), levels=names(x))
# Genus order
x = tapply(sigtabgen$log2FoldChange, sigtabgen$Genus, function(x) max(x))
x = sort(x, TRUE)
sigtabgen$Genus = factor(as.character(sigtabgen$Genus), levels=names(x))
sigtabgen = subset(sigtabgen, padj < 0.05)
fig1a=ggplot(sigtabgen, aes(y=Genus, x=log2FoldChange, color=Phylum)) + 
      geom_vline(xintercept = 0.0, color = "gray", size = 0.5) +
        geom_point(size=6) + 
      theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))

print(fig1a)

#look at all the data as a function of tooth class

fig1b=plot_ordination(indexRA, index.pcoa, type="samples", color="Habitat_Class") + 
        geom_point(size=2, alpha=0.1) +  facet_wrap(~Tooth_Aspect)+ coord_fixed(sqrt(evals[2] / evals[1]))+ 
        scale_color_discrete(name="Habitat Class",
                         breaks=c("Supra", "Sub"),
                         labels=c("Supragingival Plaque", "Subgingival Plaque"))

fig1c=plot_ordination(indexRA, index.pcoa, type="samples", color="Aim") + geom_point(size=2, alpha=0.1) + 
        theme_bw()  + facet_wrap(~Tooth_Aspect)+coord_fixed(sqrt(evals[2] / evals[1]))+ 
        scale_color_discrete(name="Health Condition",
                         breaks=c("SA1", "SA3"),
                         labels=c("Control", "Sjogren's"))

fig1d=plot_ordination(indexRA, index.pcoa, type="samples", color="Subject") + geom_point(size=2, alpha=0.1) + 
        theme_bw() + facet_wrap(~Tooth_Aspect)+coord_fixed(sqrt(evals[2] / evals[1]))+ 
        scale_color_discrete(name="Subject",
                         breaks=c("1-101", "1-102", "1-103", "1-104", "1-105", "1-106", "3-301", "3-302", "3-303"),
                         labels=c("Control 1", "Control 2", "Control 3", "Control 4", "Control 5", "Control 6",
                                  "Sjogren's 1", "Sjogren's 2", "Sjogren's 3"))
#plot the taxa
p = plot_ordination(indexRA, index.pcoa, "taxa")
    df = data.frame(p$data, tax_table(indexRA), taxa_sums(index25))
    colnames(df)[17] = "sum"
    rownames(df) = NULL
    df$Highest = paste0(df$Genus, " ", df$Species)
    df$Highest = str_replace_all(df$Highest, "NA", "")

    fig1e=ggplot(df, aes(Axis.1, Axis.2, label=Highest, color=Phylum))+coord_fixed(sqrt(evals[2] / evals[1]))+
          xlab("Axis 1 [24.1%]")  + ylab("Axis 2 [10.8%]") +  geom_text_repel(aes(Axis.1, Axis.2, label = Highest), size = 3)
    
  
grid.arrange(fig1b, fig1e, ncol=2)

ggsave(grid.arrange(fig1b, fig1e, ncol=1),  file="~/Desktop/Figure1.pdf",  width = 12, height = 10, units ="in",dpi = 300)

Now let’s quantify the segregation of samples using an adonis on Bray Curtis.

library(vegan)
## Warning: package 'vegan' was built under R version 3.4.4
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-2
otus = data.frame(otu_table(index25))
otus.h = decostand(otus, "hell")
attach(sample_data(index25))
## The following object is masked _by_ .GlobalEnv:
## 
##     x
set.seed(12)
index.adonis <- adonis(otus.h ~Habitat_Class*Aim*Tooth_Class*Tooth_Aspect, permutations=1000, strata = Subject) 
df <- format(data.frame(index.adonis$aov.tab), digits=2)
df
##                                             Df SumsOfSqs MeanSqs F.Model
## Habitat_Class                                1     21.60   21.60  134.34
## Aim                                          1      9.07    9.07   56.44
## Tooth_Class                                  1      1.05    1.05    6.51
## Tooth_Aspect                                 1      1.08    1.08    6.73
## Habitat_Class:Aim                            1      2.12    2.12   13.16
## Habitat_Class:Tooth_Class                    1      0.34    0.34    2.13
## Aim:Tooth_Class                              1      0.53    0.53    3.29
## Habitat_Class:Tooth_Aspect                   1      0.44    0.44    2.71
## Aim:Tooth_Aspect                             1      0.26    0.26    1.64
## Tooth_Class:Tooth_Aspect                     1      0.41    0.41    2.54
## Habitat_Class:Aim:Tooth_Class                1      0.17    0.17    1.09
## Habitat_Class:Aim:Tooth_Aspect               1      0.15    0.15    0.93
## Habitat_Class:Tooth_Class:Tooth_Aspect       1      0.27    0.27    1.67
## Aim:Tooth_Class:Tooth_Aspect                 1      0.19    0.19    1.17
## Habitat_Class:Aim:Tooth_Class:Tooth_Aspect   1      0.22    0.22    1.36
## Residuals                                  263     42.28    0.16      NA
## Total                                      278     80.18      NA      NA
##                                                R2 Pr..F.
## Habitat_Class                              0.2694  0.001
## Aim                                        0.1132  0.001
## Tooth_Class                                0.0130  0.001
## Tooth_Aspect                               0.0135  0.001
## Habitat_Class:Aim                          0.0264  0.001
## Habitat_Class:Tooth_Class                  0.0043  0.033
## Aim:Tooth_Class                            0.0066  0.005
## Habitat_Class:Tooth_Aspect                 0.0054  0.006
## Aim:Tooth_Aspect                           0.0033  0.053
## Tooth_Class:Tooth_Aspect                   0.0051  0.010
## Habitat_Class:Aim:Tooth_Class              0.0022  0.161
## Habitat_Class:Aim:Tooth_Aspect             0.0019  0.234
## Habitat_Class:Tooth_Class:Tooth_Aspect     0.0034  0.064
## Aim:Tooth_Class:Tooth_Aspect               0.0024  0.120
## Habitat_Class:Aim:Tooth_Class:Tooth_Aspect 0.0027  0.092
## Residuals                                  0.5273     NA
## Total                                      1.0000     NA
detach(sample_data(index25))

Figure 2: Do subgingival communities conform the gradient

Figure 2A: We see that communities differ by tooth class at supragingival sites, but not as much by subgingival sites. Interestingly, subgingival diversity appears to be most variability between molar sites within the healthy control group.

#### 1. Compute the third-order orthogonal polynomial terms
subs = levels(sample_data(index_subsites)$Habitat_Class)
otu.list <- vector("list", length(subs))
names(otu.list) <- subs

for(i in 1:length(subs)){

          #hellinger transform the data
          library(vegan)
          x1 = subset_samples(index_subsites, Habitat_Class==subs[[i]])
          x1 = prune_taxa(taxa_sums(x1) > 0, x1)
          x1 = prune_samples(sample_sums(x1) > 0, x1)
          otus = data.frame(otu_table(x1))
          otus.h = decostand(otus, "hellinger")
          map = sample_data(x1)
          map$x = as.numeric(as.character(map$x))
          map$y = as.numeric(as.character(map$y))
          
          #center the coordinates
          xygrid = cbind(map$x, map$y)
          xygrid.c <- scale(xygrid, scale=FALSE)
          
          ### Compute the third-order orthogonal polynomial function using centered geographic coordinates
          poly.xy3 <- poly(xygrid.c, degree = 3, raw=FALSE) #, coefs=TRUE) 
          colnames(poly.xy3) <- c("X", "X^2", "X^3", "Y", "XY", "X^2Y", "Y^2", "XY^2", "Y^3")
          poly.xy3.df <- data.frame(poly.xy3, map$x, map$y)
          
          library(ade4)
          #perform the RDA on the hellinger transformed data; extract the coordinates 
          rld.pca <- dudi.pca(otus.h , center=TRUE, scale=TRUE, scannf=FALSE, nf=10)
          rld.xy3 <- pcaiv(rld.pca, poly.xy3, scannf = FALSE, nf = 6)
          rld.xy3.df <- data.frame(rld.xy3$ls, map)
          xy3.df <- data.frame(rld.xy3.df, poly.xy3)
          
          # how much of the variance does the trend surface model explain?
          rld.xy3.var <- sum(rld.xy3$eig)/sum(rld.pca$eig)*100
          rld.xy3.var
          
          #look at the eigenvalues
          rld.xy3$eig
          
          #look at the screeplot
          screeplot(rld.xy3) #we should look at the first 3 axes
          
          #what is the percent of ewxplained variance
          Explainedvariance = rld.xy3$eig/sum(rld.xy3$eig)*100
          Explainedvariance
          
          #force x, y to numeric
          xy3.df$x <- as.numeric(as.character(xy3.df$x))
          xy3.df$y <- as.numeric(as.character(xy3.df$y))
          xy3.df$subsetsite = subs[[i]]
          otu.list[[i]] = xy3.df
          
}
## 
## Attaching package: 'ade4'
## The following object is masked from 'package:GenomicRanges':
## 
##     score
## The following object is masked from 'package:IRanges':
## 
##     score
## The following object is masked from 'package:BiocGenerics':
## 
##     score

xy3.df = do.call("rbind", otu.list)
### Force variables to order in ggplot
order=1:32
xy3.df$Tooth_Number <- factor(xy3.df$Tooth_Number, as.character(order))
xy3.df$tclass = xy3.df$Tooth_Class
xy3.df$tclass = str_replace_all(xy3.df$tclass, "Incisor_Central", "Incisor")
xy3.df$tclass = str_replace_all(xy3.df$tclass, "Incisor_Lateral", "Incisor")

#plot axis 1
cbPalette <- c("grey76", "Salmon", "#00BFC4", "grey76")
sub.df = subset(xy3.df, Habitat_Class=="Sub")
supra.df = subset(xy3.df, Habitat_Class=="Supra")



fig2a = ggplot(supra.df, aes(Tooth_Number, Axis1, group=Aim))  + theme_bw() + ylab("Axis 1") + 
      facet_wrap(Habitat_Class~Aim)+ xlab("Tooth")  + geom_point() + geom_smooth() + ggtitle("a)")+ 
      scale_x_discrete(breaks = seq(from=0, to=30, by=5), labels=seq(from=0, to=30, by=5))

fig2b = ggplot(sub.df, aes(Tooth_Number, Axis1, group=Aim))  + theme_bw() + ylab("Axis 1") + 
      facet_wrap(Habitat_Class~Aim)+ xlab("Tooth")  + geom_point() + geom_smooth() + ggtitle("b)")+ 
      scale_x_discrete(breaks = seq(from=0, to=30, by=5), labels=seq(from=0, to=30, by=5))

grid.arrange(fig2a, fig2b, ncol=2)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggsave(grid.arrange(fig2a, fig2b, ncol=2),  file="~/Desktop/Figure2.pdf",  width = 6, height = 3, units ="in",dpi = 300)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Figure 4: Covariation of sub and supragingival communities

#let's clean the data up so that we retain only samples present in an individual mouth at both sub/supra sites
subgingival.phy = subset_samples(decontam.phy, Habitat_Class=="Sub")
foo = sample_data(subgingival.phy)$Habitat
foo = colsplit(foo, "([_])", c("junk", "Habitat"))
sample_data(subgingival.phy)$Habitat.specific = foo$Habitat

#subgingival
    P1.1a = subset_samples(subgingival.phy, Subject=="1-101") #20
    P1.2a = subset_samples(subgingival.phy, Subject=="1-102") #22
    P1.3a = subset_samples(subgingival.phy, Subject=="1-104") #24
    P1.4a = subset_samples(subgingival.phy, Subject=="1-105") #24
    P1.5a =  subset_samples(subgingival.phy, Subject=="1-106" & Replicate=="R00") #19
    P1.5a = subset_samples(P1.5a, Habitat !="Sub_09L")
    P1.6a = subset_samples(subgingival.phy, Subject=="3-301") #24
    P1.7a = subset_samples(subgingival.phy, Subject=="3-302") #24
    P1.8a = subset_samples(subgingival.phy, Subject=="3-303") #
    statis_subgingival = merge_phyloseq(P1.1a, P1.2a, P1.3a, P1.4a, P1.5a, P1.6a, P1.7a, P1.8a)
    statis_subgingival= prune_samples(sample_sums(statis_subgingival) > 0, statis_subgingival)
    statis_subgingival
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 625 taxa and 182 samples ]
## sample_data() Sample Data:       [ 182 samples by 31 sample variables ]
## tax_table()   Taxonomy Table:    [ 625 taxa by 10 taxonomic ranks ]
keep = levels(sample_data(subgingival.phy)$Tooth_Number)
supragingival.phy = subset_samples(decontam.phy, Habitat_Class=="Supra")
supragingival.phy=subset_samples(supragingival.phy, Tooth_Number %in%  keep)
foo = sample_data(supragingival.phy)$Habitat
foo = colsplit(foo, "([_])", c("junk", "Habitat"))
sample_data(supragingival.phy)$Habitat.specific = foo$Habitat


#supragingival
    P1.1b = subset_samples(supragingival.phy, Subject=="1-101"  &  !(Habitat %in% c("Supra_08B", "Supra_08L" ,"Supra_09B" ,"Supra_09L" )))
    P1.2b = subset_samples(supragingival.phy, Subject=="1-102" & !(Habitat %in% c("Supra_27L"))) #25
    P1.3b = subset_samples(supragingival.phy, Subject=="1-104") #24
    P1.4b = subset_samples(supragingival.phy, Subject=="1-105") #24
    P1.5b =  subset_samples(supragingival.phy, Subject=="1-106"  & !(Habitat %in% c( "Supra_27B", "Supra_27L", "Supra_30B" ,"Supra_30L")))
    P1.6b = subset_samples(supragingival.phy, Subject=="3-301") #24
    P1.7b = subset_samples(supragingival.phy, Subject=="3-302") #24
    P1.8b = subset_samples(supragingival.phy, Subject=="3-303") #& !(Habitat %in% c("Supra_06B" "Supra_06L"
    
    
    statis_supragingival = merge_phyloseq(P1.1b, P1.2b, P1.3b, P1.4b, P1.5b, P1.6b, P1.7b, P1.8b)
    statis_supragingival = subset_samples(statis_supragingival, Replicate !="R04")
    statis_supragingival= prune_samples(sample_sums(statis_supragingival) > 0, statis_supragingival)

#make a new phyloseq object     
statis = merge_phyloseq(statis_supragingival, statis_subgingival)
#remove taxa present in fewert han 25% of samples    
filtergroup = filterfun(kOverA(k=70, A=10)) #k = number of samples; A = abundance
        sub.filt = filter_taxa(statis, filtergroup, prune=TRUE) 
        sub.filt = prune_samples(sample_sums(sub.filt) > 0, sub.filt) 
        sub.filt
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 83 taxa and 364 samples ]
## sample_data() Sample Data:       [ 364 samples by 31 sample variables ]
## tax_table()   Taxonomy Table:    [ 83 taxa by 10 taxonomic ranks ]
# read in the perio data
s101p = read.csv("~/Dropbox/Hyposalivation_UCSF-StanfordFileSharing/SA1_data/Bopcal_SA1/CSV/REDCap17_ESE_bopcal_SA1_v5.2_20140731_DD.csv")
s102p = read.csv("~/Dropbox/Hyposalivation_UCSF-StanfordFileSharing/SA1_data/Bopcal_SA1/CSV/REDCap18_ESE_bopcal_SA1_v5.2_20140805_DD.csv")
s104p = read.csv("~/Dropbox/Hyposalivation_UCSF-StanfordFileSharing/SA1_data/Bopcal_SA1/CSV/REDCap39_ESE_bopcal_SA1_v5.2_20140925_DD.csv")
s105p = read.csv("~/Dropbox/Hyposalivation_UCSF-StanfordFileSharing/SA1_data/Bopcal_SA1/CSV/REDCap49_ESE_bopcal_SA1_v5.2_20141021_DD.csv")
s106p = read.csv("~/Dropbox/Hyposalivation_UCSF-StanfordFileSharing/SA1_data/Bopcal_SA1/CSV/REDCap63_ESE_bopcal_SA1_v5.2_20141104_DD.csv")
s301p = read.csv("~/Dropbox/Hyposalivation_UCSF-StanfordFileSharing/SA3_data/Bopcal_SA3/CSV/REDCap66_ESE_bopcal_SA3_v5.2_20141120_DD.csv")
s302p = read.csv("~/Dropbox/Hyposalivation_UCSF-StanfordFileSharing/SA3_data/Bopcal_SA3/CSV/REDCap178_ESE_bopcal_SA3_v5.2_20150528_DD.csv")
s303p = read.csv("~/Dropbox/Hyposalivation_UCSF-StanfordFileSharing/SA3_data/Bopcal_SA3/CSV/REDCap219_ESE_bopcal_SA3_v5.2_20150818_SK.csv")

perio.df = data.frame(rbind(s101p, s102p, s104p, s105p, s106p, s301p, s302p, s303p))
perio.df = subset(perio.df, pd !="Missing")
perio.df = subset(perio.df, tooth !="NA")

pd = doBy::summaryBy(pd~redcap+tooth, data=perio.df, FUN=mean)
colnames(pd) = c("redcap", "Tooth_Number", "PD")

cal = doBy::summaryBy(cal~redcap+tooth, data=perio.df, FUN=mean)
colnames(cal) = c("redcap", "Tooth_Number", "CAL")

bop = doBy::summaryBy(percent.bop~redcap+tooth, data=perio.df, FUN=mean)
colnames(bop) = c("redcap", "Tooth_Number", "BOP")

#read in the caries data #
s101c = read.csv("~/Dropbox/Hyposalivation_UCSF-StanfordFileSharing/SA1_data/Axium_Data_SA1/CSV/REDCap17_ESE_Axium_SA1_v1.2_20140731_DD.csv")
s102c = read.csv("~/Dropbox/Hyposalivation_UCSF-StanfordFileSharing/SA1_data/Axium_Data_SA1/CSV/REDCap18_ESE_Axium_SA1_v1.2_20140819_DP.csv")
s104c = read.csv("~/Dropbox/Hyposalivation_UCSF-StanfordFileSharing/SA1_data/Axium_Data_SA1/CSV/REDCap39_ESE_Axium_SA1_v1.2_20140925_DD.csv")
s105c = read.csv("~/Dropbox/Hyposalivation_UCSF-StanfordFileSharing/SA1_data/Axium_Data_SA1/CSV/REDCap49_ESE_Axium_SA1_v1.2_20141021_DD.csv")
s106c = read.csv("~/Dropbox/Hyposalivation_UCSF-StanfordFileSharing/SA1_data/Axium_Data_SA1/CSV/REDCap63_ESE_Axium_SA1_v1.2_20141104_DD.csv")
s301c = read.csv("~/Dropbox/Hyposalivation_UCSF-StanfordFileSharing/SA3_data/Axium_Data_SA3/CSV/REDCap66_ESE_Axium_SA3_v1.2_20141120_DD.csv")
s302c = read.csv("~/Dropbox/Hyposalivation_UCSF-StanfordFileSharing/SA3_data/Axium_Data_SA3/CSV/REDCap178_ESE_Axium_SA3_v1.2_20150528_DD.csv")
s303c = read.csv("~/Dropbox/Hyposalivation_UCSF-StanfordFileSharing/SA3_data/Axium_Data_SA3/CSV/REDCap219_ESE_Axium_SA3_v1.2_20150818_SK.csv")

caries.df = data.frame(rbind(s101c, s102c, s104c, s105c, s106c, s301c, s302c, s303c))
caries.df = subset(caries.df, description !="MISSING")
caries.df = subset(caries.df, tooth !="NA")


caries.df$binary = ifelse(caries.df$description =="SOUND", 0, 1)
dmfs = summaryBy(binary~redcap + tooth, data=caries.df, FUN=sum)
colnames(dmfs) =c("redcap", "Tooth_Number",  "dmfs")


map = data.frame(sample_data(sub.filt))
pids = read.csv("~/Desktop/pids.csv")
colnames(pids) = c("redcap", "Subject")
flow = read.csv("~/Dropbox/Periodontology2000/flowrates.csv")

new.map = join(map, pids, "Subject")
new.map = join(new.map, pd)
## Joining by: Tooth_Number, redcap
new.map = join(new.map, cal)
## Joining by: Tooth_Number, redcap
new.map = join(new.map, bop)
## Joining by: Tooth_Number, redcap
new.map = join(new.map, flow)
## Joining by: Subject, redcap
new.map = join(new.map, dmfs)
## Joining by: Tooth_Number, redcap
rownames(new.map) = new.map$X.SampleID

sample_data(sub.filt) = sample_data(new.map)

Look at the clinical data

#set an analysis up for CCA
otus = otu_table(sub.filt)
otus = decostand(otus, "hell")
map1 = data.frame(sample_data(sub.filt)$Habitat_Class, sample_data(sub.filt)$UWS_FR, sample_data(sub.filt)$CAL, 
                  sample_data(sub.filt)$PD, sample_data(sub.filt)$BOP, sample_data(sub.filt)$dmfs)
                  
colnames(map1) = c("Class", "uws_fr",  "cal", "pd", "bop", "dmfs")
map1$Class = as.numeric(map1$Class)
subs = as.vector(sample_data(sub.filt)$Subject)
runs = as.vector(sample_data(sub.filt)$Pool_Name)

#for some reason this wworks on the command line but not in knitr
attach(map1)
## The following objects are masked _by_ .GlobalEnv:
## 
##     bop, cal, dmfs, pd
flow.cca = vegan::cca(otus ~ map1$Class* map1$uws_fr* map1$cal* map1$pd* map1$bop* map1$dmfs, sitenv=map1)


#run an anova on this
set.seed(100)
flow.aov = anova.cca(flow.cca, by="term", strata=subs)
flow.aov
## Permutation test for cca under reduced model
## Terms added sequentially (first to last)
## Blocks:  strata 
## Permutation: free
## Number of permutations: 999
## 
## Model: cca(formula = otus ~ map1$Class * map1$uws_fr * map1$cal * map1$pd * map1$bop * map1$dmfs, sitenv = map1)
##                                                     Df ChiSquare       F
## map1$Class                                           1   0.51056 96.6991
## map1$uws_fr                                          1   0.22654 42.9072
## map1$cal                                             1   0.05838 11.0570
## map1$pd                                              1   0.05290 10.0198
## map1$bop                                             1   0.12729 24.1085
## map1$dmfs                                            1   0.04008  7.5918
## map1$Class:map1$uws_fr                               1   0.10542 19.9672
## map1$Class:map1$cal                                  1   0.02503  4.7406
## map1$uws_fr:map1$cal                                 1   0.02665  5.0472
## map1$Class:map1$pd                                   1   0.02914  5.5197
## map1$uws_fr:map1$pd                                  1   0.03257  6.1691
## map1$cal:map1$pd                                     1   0.02653  5.0252
## map1$Class:map1$bop                                  1   0.04917  9.3122
## map1$uws_fr:map1$bop                                 1   0.09407 17.8159
## map1$cal:map1$bop                                    1   0.02315  4.3844
## map1$pd:map1$bop                                     1   0.01877  3.5544
## map1$Class:map1$dmfs                                 1   0.01971  3.7329
## map1$uws_fr:map1$dmfs                                1   0.01838  3.4819
## map1$cal:map1$dmfs                                   1   0.01418  2.6848
## map1$pd:map1$dmfs                                    1   0.01034  1.9583
## map1$bop:map1$dmfs                                   1   0.00816  1.5460
## map1$Class:map1$uws_fr:map1$cal                      1   0.01904  3.6064
## map1$Class:map1$uws_fr:map1$pd                       1   0.01876  3.5541
## map1$Class:map1$cal:map1$pd                          1   0.01659  3.1415
## map1$uws_fr:map1$cal:map1$pd                         1   0.01164  2.2045
## map1$Class:map1$uws_fr:map1$bop                      1   0.03583  6.7868
## map1$Class:map1$cal:map1$bop                         1   0.00936  1.7727
## map1$uws_fr:map1$cal:map1$bop                        1   0.01952  3.6975
## map1$Class:map1$pd:map1$bop                          1   0.00893  1.6919
## map1$uws_fr:map1$pd:map1$bop                         1   0.01250  2.3677
## map1$cal:map1$pd:map1$bop                            1   0.01523  2.8851
## map1$Class:map1$uws_fr:map1$dmfs                     1   0.01288  2.4388
## map1$Class:map1$cal:map1$dmfs                        1   0.00664  1.2585
## map1$uws_fr:map1$cal:map1$dmfs                       1   0.00875  1.6575
## map1$Class:map1$pd:map1$dmfs                         1   0.00621  1.1764
## map1$uws_fr:map1$pd:map1$dmfs                        1   0.00591  1.1185
## map1$cal:map1$pd:map1$dmfs                           1   0.00473  0.8951
## map1$Class:map1$bop:map1$dmfs                        1   0.00576  1.0913
## map1$uws_fr:map1$bop:map1$dmfs                       1   0.00698  1.3214
## map1$cal:map1$bop:map1$dmfs                          1   0.00984  1.8640
## map1$pd:map1$bop:map1$dmfs                           1   0.00577  1.0925
## map1$Class:map1$uws_fr:map1$cal:map1$pd              1   0.00943  1.7860
## map1$Class:map1$uws_fr:map1$cal:map1$bop             1   0.01183  2.2399
## map1$Class:map1$uws_fr:map1$pd:map1$bop              1   0.00628  1.1893
## map1$Class:map1$cal:map1$pd:map1$bop                 1   0.01003  1.8995
## map1$uws_fr:map1$cal:map1$pd:map1$bop                1   0.00642  1.2155
## map1$Class:map1$uws_fr:map1$cal:map1$dmfs            1   0.00685  1.2979
## map1$Class:map1$uws_fr:map1$pd:map1$dmfs             1   0.00375  0.7110
## map1$Class:map1$cal:map1$pd:map1$dmfs                1   0.00352  0.6663
## map1$uws_fr:map1$cal:map1$pd:map1$dmfs               1   0.00481  0.9104
## map1$Class:map1$uws_fr:map1$bop:map1$dmfs            1   0.00561  1.0628
## map1$Class:map1$cal:map1$bop:map1$dmfs               1   0.00392  0.7419
## map1$uws_fr:map1$cal:map1$bop:map1$dmfs              1   0.00632  1.1971
## map1$Class:map1$pd:map1$bop:map1$dmfs                1   0.00560  1.0601
## map1$uws_fr:map1$pd:map1$bop:map1$dmfs               1   0.00657  1.2434
## map1$cal:map1$pd:map1$bop:map1$dmfs                  1   0.01118  2.1174
## map1$Class:map1$uws_fr:map1$cal:map1$pd:map1$bop     1   0.00476  0.9014
## map1$Class:map1$uws_fr:map1$cal:map1$pd:map1$dmfs    1   0.00427  0.8082
## map1$Class:map1$uws_fr:map1$cal:map1$bop:map1$dmfs   1   0.00428  0.8100
## map1$Class:map1$uws_fr:map1$pd:map1$bop:map1$dmfs    1   0.00457  0.8652
## map1$Class:map1$cal:map1$pd:map1$bop:map1$dmfs       1   0.00728  1.3797
## Residual                                           302   1.59452        
##                                                    Pr(>F)    
## map1$Class                                          0.001 ***
## map1$uws_fr                                         0.001 ***
## map1$cal                                            0.001 ***
## map1$pd                                             0.001 ***
## map1$bop                                            0.001 ***
## map1$dmfs                                           0.004 ** 
## map1$Class:map1$uws_fr                              0.001 ***
## map1$Class:map1$cal                                 0.001 ***
## map1$uws_fr:map1$cal                                0.027 *  
## map1$Class:map1$pd                                  0.001 ***
## map1$uws_fr:map1$pd                                 0.001 ***
## map1$cal:map1$pd                                    0.035 *  
## map1$Class:map1$bop                                 0.001 ***
## map1$uws_fr:map1$bop                                0.001 ***
## map1$cal:map1$bop                                   0.144    
## map1$pd:map1$bop                                    0.124    
## map1$Class:map1$dmfs                                0.001 ***
## map1$uws_fr:map1$dmfs                               0.003 ** 
## map1$cal:map1$dmfs                                  0.112    
## map1$pd:map1$dmfs                                   0.125    
## map1$bop:map1$dmfs                                  0.344    
## map1$Class:map1$uws_fr:map1$cal                     0.001 ***
## map1$Class:map1$uws_fr:map1$pd                      0.001 ***
## map1$Class:map1$cal:map1$pd                         0.003 ** 
## map1$uws_fr:map1$cal:map1$pd                        0.407    
## map1$Class:map1$uws_fr:map1$bop                     0.001 ***
## map1$Class:map1$cal:map1$bop                        0.034 *  
## map1$uws_fr:map1$cal:map1$bop                       0.018 *  
## map1$Class:map1$pd:map1$bop                         0.047 *  
## map1$uws_fr:map1$pd:map1$bop                        0.033 *  
## map1$cal:map1$pd:map1$bop                           0.023 *  
## map1$Class:map1$uws_fr:map1$dmfs                    0.005 ** 
## map1$Class:map1$cal:map1$dmfs                       0.129    
## map1$uws_fr:map1$cal:map1$dmfs                      0.233    
## map1$Class:map1$pd:map1$dmfs                        0.181    
## map1$uws_fr:map1$pd:map1$dmfs                       0.328    
## map1$cal:map1$pd:map1$dmfs                          0.606    
## map1$Class:map1$bop:map1$dmfs                       0.280    
## map1$uws_fr:map1$bop:map1$dmfs                      0.301    
## map1$cal:map1$bop:map1$dmfs                         0.362    
## map1$pd:map1$bop:map1$dmfs                          0.660    
## map1$Class:map1$uws_fr:map1$cal:map1$pd             0.029 *  
## map1$Class:map1$uws_fr:map1$cal:map1$bop            0.010 ** 
## map1$Class:map1$uws_fr:map1$pd:map1$bop             0.176    
## map1$Class:map1$cal:map1$pd:map1$bop                0.033 *  
## map1$uws_fr:map1$cal:map1$pd:map1$bop               0.241    
## map1$Class:map1$uws_fr:map1$cal:map1$dmfs           0.139    
## map1$Class:map1$uws_fr:map1$pd:map1$dmfs            0.707    
## map1$Class:map1$cal:map1$pd:map1$dmfs               0.720    
## map1$uws_fr:map1$cal:map1$pd:map1$dmfs              0.757    
## map1$Class:map1$uws_fr:map1$bop:map1$dmfs           0.317    
## map1$Class:map1$cal:map1$bop:map1$dmfs              0.613    
## map1$uws_fr:map1$cal:map1$bop:map1$dmfs             0.789    
## map1$Class:map1$pd:map1$bop:map1$dmfs               0.255    
## map1$uws_fr:map1$pd:map1$bop:map1$dmfs              0.551    
## map1$cal:map1$pd:map1$bop:map1$dmfs                 0.099 .  
## map1$Class:map1$uws_fr:map1$cal:map1$pd:map1$bop    0.495    
## map1$Class:map1$uws_fr:map1$cal:map1$pd:map1$dmfs   0.646    
## map1$Class:map1$uws_fr:map1$cal:map1$bop:map1$dmfs  0.531    
## map1$Class:map1$uws_fr:map1$pd:map1$bop:map1$dmfs   0.667    
## map1$Class:map1$cal:map1$pd:map1$bop:map1$dmfs      0.126    
## Residual                                                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#how much variance does the model explain
eig = flow.cca$CCA$eig
eigp = 100*(eig/sum(eig))
head(eigp)
##      CCA1      CCA2      CCA3      CCA4      CCA5      CCA6 
## 27.671209 19.323712 10.865190  8.335889  6.015871  4.889771
#get the sample site scores and plot them
sites = data.frame(flow.cca$CCA$wa, sample_data(sub.filt))
sites$Aim = ifelse(sites$Aim=="SA1", "Control", "Sjogrens")


#convert the MFS score to factor
p1 = ggplot(sites, aes(CCA1, CCA2, color=as.factor(Aim))) + geom_point() + guides(color=guide_legend(title="Health Status")) + 
            ggtitle("a)")  + xlab("CCA1 (27.7%)") + ylab("CCA2 (19.3%)") + 
            theme(text = element_text(size=10, family="Helvetica"), axis.text.x = element_text(angle=00, vjust=1)) +
            coord_fixed(sqrt(eigp[2] / eigp[1])) 

p2 = ggplot(sites, aes(CCA1, CCA2, color=uws_fr)) + geom_point() + guides(color=guide_legend(title="uws_fr")) + 
            ggtitle("b)")  + xlab("CCA1 (27.7%)") + ylab("CCA2 (19.3%)") + 
            theme(text = element_text(size=10, family="Helvetica"), axis.text.x = element_text(angle=00, vjust=1)) +
            coord_fixed(sqrt(eigp[2] / eigp[1]))+ scale_colour_gradientn(colours=c("red", "blue"))

p3 = ggplot(sites, aes(CCA1, CCA2, color=dmfs)) + geom_point() + guides(color=guide_legend(title="dmfs")) + 
            ggtitle("c)")  + xlab("CCA1 (27.7%)") + ylab("CCA2 (19.3%)") + 
            theme(text = element_text(size=10, family="Helvetica"), axis.text.x = element_text(angle=00, vjust=1)) +
            coord_fixed(sqrt(eigp[2] / eigp[1]))+ scale_colour_gradientn(colours=c("red", "blue"))

p4 = ggplot(sites, aes(CCA1, CCA2, color=CAL)) + geom_point() + guides(color=guide_legend(title="CAL")) + 
            ggtitle("d)")  + xlab("CCA1 (27.7%)") + ylab("CCA2 (19.3%)") + 
            theme(text = element_text(size=10, family="Helvetica"), axis.text.x = element_text(angle=00, vjust=1)) +
            coord_fixed(sqrt(eigp[2] / eigp[1]))+ scale_colour_gradientn(colours=c("red", "blue"))

p5 = ggplot(sites, aes(CCA1, CCA2, color=BOP)) + geom_point() + guides(color=guide_legend(title="BOP")) + 
            ggtitle("e)")  + xlab("CCA1 (27.7%)") + ylab("CCA2 (19.3%)") + 
            theme(text = element_text(size=10, family="Helvetica"), axis.text.x = element_text(angle=00, vjust=1)) +
            coord_fixed(sqrt(eigp[2] / eigp[1]))+ scale_colour_gradientn(colours=c("red", "blue"))

p6 = ggplot(sites, aes(CCA1, CCA2, color=PD)) + geom_point() + guides(color=guide_legend(title="PD")) + 
            ggtitle("f)")  + xlab("CCA1 (27.7%)") + ylab("CCA2 (19.3%)") + 
            theme(text = element_text(size=10, family="Helvetica"), axis.text.x = element_text(angle=00, vjust=1)) +
            coord_fixed(sqrt(eigp[2] / eigp[1]))+ scale_colour_gradientn(colours=c("red", "blue"))

grid.arrange(p1, p2, p3, p4, p5, p6, ncol=2)

Look at the covariation between subgingival and supragingival sites and taxa

#subset on supragingival
supra = subset_samples(sub.filt, Habitat_Class=="Supra")
supra = prune_taxa(taxa_sums(supra) >1, supra)

#subset on subgingival
sub = subset_samples(sub.filt, Habitat_Class=="Sub")
sub = prune_taxa(taxa_sums(sub) >1, sub)


#transform
  Hstatis_subgingival = decostand(data.frame(otu_table(sub)), "hel")
  Hstatis_supragingival = decostand(data.frame(otu_table(supra)), "hel")

#generate the model
set.seed(981)
subs = sample_data(supra)$Subject
mod1 = CCorA(Hstatis_subgingival, Hstatis_supragingival, permutations=999)
biplot(mod1, plot.type="ov")

  eval = 100*(mod1$Eigenvalues/sum(mod1$Eigenvalues))


#plot subgingival samples
df1 = data.frame(mod1$Cx, sample_data(sub))
p1=ggplot(df1, aes(CanAxis1, CanAxis2, label=Subject, color=Aim)) + geom_text() + ggtitle(("a)")) + 
          theme_bw()+theme(text = element_text(size=20), axis.text.x = element_text(angle=0, vjust=1)) + 
          theme(legend.position="none") + xlab("3.1%")+ ylab("3.1%")

#plot supragingival samples
df2 = data.frame(mod1$Cy, sample_data(supra))
p2 = ggplot(df2, aes(CanAxis1, CanAxis2, label=Subject, color=Aim)) + geom_text() + ggtitle("b)") + 
         theme_bw()+theme(text = element_text(size=20), axis.text.x = element_text(angle=0, vjust=1)) + 
        theme(legend.position="none") + xlab("3.1%")+ ylab("3.1%")


#subgingival species
df3 = data.frame(mod1$corr.Y.Cy, tax_table(sub))
#write.csv()
myTax3 = df3$Highest.Rank
p3 = ggplot(df3, aes(CanAxis1, CanAxis2, label=rownames(df3), color=Phylum))  + 
          theme_bw()+theme(text = element_text(size=20), axis.text.x = element_text(angle=0, vjust=1))  + 
          theme(legend.position="bottom") + geom_text_repel(aes(CanAxis1, CanAxis2, label = myTax3), size = 3)



#supragingival species
df4 = data.frame(mod1$corr.X.Cy, tax_table(supra))
myTax4 = df4$Highest.Rank
p4 = ggplot(df4, aes(CanAxis1, CanAxis2, label=rownames(df4), color=Phylum))  + 
          theme_bw()+theme(text = element_text(size=20), axis.text.x = element_text(angle=0, vjust=1)) + 
          theme(legend.position="bottom")+ geom_text_repel(aes(CanAxis1, CanAxis2, label = myTax4), size = 3)


#plot
grid.arrange(p1, p2, p3, p4, ncol=2)

mod1
## 
## Canonical Correlation Analysis
## 
## Call:
## CCorA(Y = Hstatis_subgingival, X = Hstatis_supragingival, permutations = 999) 
## 
##               Y  X
## Matrix Ranks 63 82
## 
## Pillai's trace:  32.34643 
## 
## Significance of Pillai's trace:
## from F-distribution:   < 2.22e-16 
## based on permutations: 0.001 
## Permutation: free
## Number of permutations: 999
##  
##                        CanAxis1 CanAxis2 CanAxis3 CanAxis4 CanAxis5
## Canonical Correlations  0.99582  0.99207  0.98786  0.98271  0.97524
##                        CanAxis6 CanAxis7 CanAxis8 CanAxis9 CanAxis10
## Canonical Correlations  0.97257  0.96719  0.96234  0.95815   0.95326
##                        CanAxis11 CanAxis12 CanAxis13 CanAxis14 CanAxis15
## Canonical Correlations   0.94811   0.94029   0.93097   0.91924   0.91284
##                        CanAxis16 CanAxis17 CanAxis18 CanAxis19 CanAxis20
## Canonical Correlations   0.90369   0.89181   0.88321   0.87142   0.86111
##                        CanAxis21 CanAxis22 CanAxis23 CanAxis24 CanAxis25
## Canonical Correlations   0.85538   0.84064   0.83844   0.82316   0.80178
##                        CanAxis26 CanAxis27 CanAxis28 CanAxis29 CanAxis30
## Canonical Correlations   0.79894   0.79665   0.79129   0.76745   0.75951
##                        CanAxis31 CanAxis32 CanAxis33 CanAxis34 CanAxis35
## Canonical Correlations   0.73956   0.72540   0.72085   0.69162   0.67337
##                        CanAxis36 CanAxis37 CanAxis38 CanAxis39 CanAxis40
## Canonical Correlations   0.65051   0.64334   0.63501   0.61414   0.59257
##                        CanAxis41 CanAxis42 CanAxis43 CanAxis44 CanAxis45
## Canonical Correlations   0.57645   0.57584   0.55907   0.54152   0.52546
##                        CanAxis46 CanAxis47 CanAxis48 CanAxis49 CanAxis50
## Canonical Correlations   0.51112   0.49474   0.47702   0.43050   0.42409
##                        CanAxis51 CanAxis52 CanAxis53 CanAxis54 CanAxis55
## Canonical Correlations   0.38521   0.37556   0.33694   0.32497   0.29939
##                        CanAxis56 CanAxis57 CanAxis58 CanAxis59 CanAxis60
## Canonical Correlations   0.27436   0.25889   0.23370   0.21744   0.21106
##                        CanAxis61 CanAxis62 CanAxis63
## Canonical Correlations   0.16926   0.16713    0.1217
## 
##                     Y | X  X | Y
## RDA R squares      0.7303 0.6534
## adj. RDA R squares 0.5069 0.4684
#look at s.mutans
sm = subset_taxa(sub.filt, Highest.Rank=="Streptococcus_mutans")

df= data.frame(otu_table(sm), sample_data(sm))
dfm= melt(df, id.vars = colnames(sample_data(sm)))
ordering= c("3", "6","8","9", "11", "14","19", "22","24","25","27","30")
dfm$Tooth_Number  <- factor(dfm$Tooth_Number, levels = ordering)

p=ggplot(dfm,aes(Habitat_Class,value)) + geom_bar(stat="identity") + facet_wrap(~Aim)


p = ggplot(dfm,aes(Tooth_Number,PD, size=value, color=Tooth_Class)) + geom_point() + facet_wrap(Habitat_Class~Subject, ncol=8)
p

#Prevotella_histicola
ph = subset_taxa(sub.filt, Highest.Rank=="Prevotella_histicola")

df= data.frame(otu_table(ph), sample_data(ph))
dfm= melt(df, id.vars = colnames(sample_data(ph)))
ordering= c("3", "6","8","9", "11", "14","19", "22","24","25","27","30")
dfm$Tooth_Number  <- factor(dfm$Tooth_Number, levels = ordering)

p=ggplot(dfm,aes(Habitat_Class,value)) + geom_bar(stat="identity") + facet_wrap(~Aim)


p = ggplot(dfm,aes(Tooth_Number,PD, size=value, color=Tooth_Class)) + geom_point() + facet_wrap(Habitat_Class~Subject, ncol=8)
p

#look at Scardovia_wiggsiae
sm = subset_taxa(sub.filt, Highest.Rank=="Scardovia_wiggsiae")

df= data.frame(otu_table(sm), sample_data(sm))
dfm= melt(df, id.vars = colnames(sample_data(sm)))
ordering= c("3", "6","8","9", "11", "14","19", "22","24","25","27","30")
dfm$Tooth_Number  <- factor(dfm$Tooth_Number, levels = ordering)

p=ggplot(dfm,aes(Habitat_Class,value)) + geom_bar(stat="identity") + facet_wrap(~Aim)


p = ggplot(dfm,aes(Tooth_Number,PD, size=value, color=Tooth_Class)) + geom_point() + facet_wrap(Habitat_Class~Subject, ncol=8)
p

#look at Streptococcus_sanguinis
sm = subset_taxa(sub.filt, Highest.Rank=="Streptococcus_sanguinis")

df= data.frame(otu_table(sm), sample_data(sm))
dfm= melt(df, id.vars = colnames(sample_data(sm)))
ordering= c("3", "6","8","9", "11", "14","19", "22","24","25","27","30")
dfm$Tooth_Number  <- factor(dfm$Tooth_Number, levels = ordering)

p=ggplot(dfm,aes(Habitat_Class,value)) + geom_bar(stat="identity") + facet_wrap(~Aim)


p = ggplot(dfm,aes(Tooth_Number,PD, size=value, color=Tooth_Class)) + geom_point() + facet_wrap(Habitat_Class~Subject, ncol=8)
p

drop 3-301

#subset on supragingival
supra = subset_samples(sub.filt, Habitat_Class=="Supra")
supra = subset_samples(supra, Aim =="SA1")
supra = prune_taxa(taxa_sums(supra) >1, supra)

#subset on subgingival
sub = subset_samples(sub.filt, Habitat_Class=="Sub")
sub = subset_samples(sub, Aim=="SA1")
sub = prune_taxa(taxa_sums(sub) >1, sub)

#transform
  Hstatis_subgingival = decostand(data.frame(otu_table(sub)), "hel")
  Hstatis_supragingival = decostand(data.frame(otu_table(supra)), "hel")

  
  eval = 100*(mod1$Eigenvalues/sum(mod1$Eigenvalues))

#generate the model
set.seed(981)
subs = sample_data(supra)$Subject
mod1 = CCorA(Hstatis_subgingival, Hstatis_supragingival, permutations=999)
biplot(mod1, plot.type="ov")

#plot subgingival samples
df1 = data.frame(mod1$Cx, sample_data(sub))
p1=ggplot(df1, aes(CanAxis1, CanAxis2, label=Subject, color=Subject)) + geom_text() + ggtitle(("a)")) + 
          theme_bw()+theme(text = element_text(size=20), axis.text.x = element_text(angle=0, vjust=1)) + 
          theme(legend.position="none") + xlab("3.1%")+ ylab("3.1%")

#plot supragingival samples
df2 = data.frame(mod1$Cy, sample_data(supra))
p2 = ggplot(df2, aes(CanAxis1, CanAxis2, label=Subject, color=Subject)) + geom_text() + ggtitle("b)") + 
         theme_bw()+theme(text = element_text(size=20), axis.text.x = element_text(angle=0, vjust=1)) + 
        theme(legend.position="none") + xlab("3.1%")+ ylab("3.1%")


#subgingival species
df3 = data.frame(mod1$corr.Y.Cy, tax_table(sub))
#write.csv()
myTax3 = df3$Highest.Rank
p3 = ggplot(df3, aes(CanAxis1, CanAxis2, label=rownames(df3), color=Phylum))  + 
          theme_bw()+theme(text = element_text(size=20), axis.text.x = element_text(angle=0, vjust=1))  + 
          theme(legend.position="bottom") + geom_text_repel(aes(CanAxis1, CanAxis2, label = myTax3), size = 3)



#supragingival species
df4 = data.frame(mod1$corr.X.Cy, tax_table(supra))
myTax4 = df4$Highest.Rank
p4 = ggplot(df4, aes(CanAxis1, CanAxis2, label=rownames(df4), color=Phylum))  + 
          theme_bw()+theme(text = element_text(size=20), axis.text.x = element_text(angle=0, vjust=1)) + 
          theme(legend.position="bottom")+ geom_text_repel(aes(CanAxis1, CanAxis2, label = myTax4), size = 3)


#plot
grid.arrange(p1, p2, p3, p4, ncol=2)

mod1
## 
## Canonical Correlation Analysis
## 
## Call:
## CCorA(Y = Hstatis_subgingival, X = Hstatis_supragingival, permutations = 999) 
## 
##               Y  X
## Matrix Ranks 63 82
## 
## Pillai's trace:  49.00043 
## 
## Significance of Pillai's trace:
## from F-distribution:   0.00020046 
## based on permutations: 0.001 
## Permutation: free
## Number of permutations: 999
##  
##                        CanAxis1 CanAxis2 CanAxis3 CanAxis4 CanAxis5
## Canonical Correlations  1.00000  1.00000  1.00000  1.00000  1.00000
##                        CanAxis6 CanAxis7 CanAxis8 CanAxis9 CanAxis10
## Canonical Correlations  1.00000  1.00000  1.00000  1.00000   1.00000
##                        CanAxis11 CanAxis12 CanAxis13 CanAxis14 CanAxis15
## Canonical Correlations   1.00000   1.00000   1.00000   1.00000   1.00000
##                        CanAxis16 CanAxis17 CanAxis18 CanAxis19 CanAxis20
## Canonical Correlations   1.00000   1.00000   1.00000   1.00000   1.00000
##                        CanAxis21 CanAxis22 CanAxis23 CanAxis24 CanAxis25
## Canonical Correlations   1.00000   1.00000   1.00000   1.00000   1.00000
##                        CanAxis26 CanAxis27 CanAxis28 CanAxis29 CanAxis30
## Canonical Correlations   1.00000   1.00000   1.00000   1.00000   1.00000
##                        CanAxis31 CanAxis32 CanAxis33 CanAxis34 CanAxis35
## Canonical Correlations   1.00000   1.00000   1.00000   1.00000   1.00000
##                        CanAxis36 CanAxis37 CanAxis38 CanAxis39 CanAxis40
## Canonical Correlations   1.00000   0.95843   0.94227   0.92481   0.90742
##                        CanAxis41 CanAxis42 CanAxis43 CanAxis44 CanAxis45
## Canonical Correlations   0.88583   0.86619   0.85130   0.83236   0.81504
##                        CanAxis46 CanAxis47 CanAxis48 CanAxis49 CanAxis50
## Canonical Correlations   0.79023   0.75572   0.74428   0.70769   0.67561
##                        CanAxis51 CanAxis52 CanAxis53 CanAxis54 CanAxis55
## Canonical Correlations   0.65663   0.64684   0.60128   0.58696   0.55685
##                        CanAxis56 CanAxis57 CanAxis58 CanAxis59 CanAxis60
## Canonical Correlations   0.52476   0.50010   0.43905   0.39938   0.38251
##                        CanAxis61 CanAxis62 CanAxis63
## Canonical Correlations   0.35834   0.32460    0.2611
## 
##                      Y | X  X | Y
## RDA R squares      0.84767 0.7117
## adj. RDA R squares 0.38504 0.3169
#look at s.mutans
sm = subset_taxa(sub.filt, Highest.Rank=="Streptococcus_mutans")

df= data.frame(otu_table(sm), sample_data(sm))
dfm= melt(df, id.vars = colnames(sample_data(sm)))
ordering= c("3", "6","8","9", "11", "14","19", "22","24","25","27","30")
dfm$Tooth_Number  <- factor(dfm$Tooth_Number, levels = ordering)

p=ggplot(dfm,aes(Habitat_Class,value)) + geom_bar(stat="identity") + facet_wrap(~Aim)


p = ggplot(dfm,aes(Tooth_Number,PD, size=value, color=Tooth_Class)) + geom_point() + facet_wrap(Habitat_Class~Subject, ncol=8)


#
ph = subset_taxa(sub.filt, Highest.Rank=="Prevotella_histicola")

df= data.frame(otu_table(ph), sample_data(ph))
dfm= melt(df, id.vars = colnames(sample_data(ph)))
ordering= c("3", "6","8","9", "11", "14","19", "22","24","25","27","30")
dfm$Tooth_Number  <- factor(dfm$Tooth_Number, levels = ordering)

p=ggplot(dfm,aes(Habitat_Class,value)) + geom_bar(stat="identity") + facet_wrap(~Aim)

p

p = ggplot(dfm,aes(Tooth_Number,PD, size=value, color=Tooth_Class)) + geom_point() + facet_wrap(Habitat_Class~Subject, ncol=8)

p